---
fontsize: 11pt
output:
  pdf_document: default
urlcolor: red
classoption: landscape
header-includes:
  - \usepackage{bm}
  - \usepackage{pdflscape}
  - \usepackage{lscape}

---
<!-- Replication Code for Gov 2001 Class Project -->

# Extension Tables and Figures

```{r setup, include=FALSE}
#load packages
library(haven)
library(tidyverse)
library(ggeffects)
library(lfe)
library(ivreg)
library(texreg)
library(mitml)
library(ggpubr)
library(gt)
library(MatchIt)
library(cobalt)
library(dplyr)
library(gridExtra)

# set working directory to where your data is 
# must do this in several file locations, depending on your file structure
#setwd()

#### SET UP ########################################################

# read in data
seattle <- read_dta("Seattle_SSO.dta")

# turn numeric variables into factors
seattle <- seattle %>%
  mutate(across(c(2, 7:17), factor))
```



```{r matching_setup, eval=TRUE, echo=FALSE, message = FALSE, warning = FALSE}
#### SET UP FOR MATCHING  ########################################################

# extract "original"/nonimputed dataset
basedata<- seattle%>%
  filter(`_mi_m`==0)

# list of observations with a baseline data point
base_list<-basedata%>%
  filter(period==0)%>%pull(respondent_id)

# list of observations (respondents) with baseline & period 1 data points
two_period_list<-basedata%>%
  filter(respondent_id %in% base_list)%>%
  filter(period == 1) %>% pull(respondent_id)

# NOTE: only 8 obs have baseline, period 1, period 2 points. 13 obs have baseline and period 2 only. 

# generate a new dataset that only contains respondents with both period 1 and period 0 observations
subset<-basedata%>%
  filter(respondent_id %in% two_period_list)

# create list of imputed datasets with only baseline observations (that are also in our subset), for matching
implist_base <- lapply(seq(1:10), function(i) {
  seattle %>%
    filter(`_mi_m` == i & period==0 & (respondent_id %in% subset$respondent_id)) %>%
    as.data.frame()
})

implist_base <- mitml::as.mitml.list(implist_base)

# turn that list into the mitml class of imputed datasets
implist <- lapply(seq(1:10), function(i) {
  seattle %>%
    filter(`_mi_m` == i ) %>%
    as.data.frame()
})

implist <- mitml::as.mitml.list(implist)
```



```{r matching_function, eval=TRUE, echo=FALSE, message = FALSE, warning = FALSE}

######## MATCHING FUNCTION ######################################################
matching <- function(meth, dist){
# perform matching
matches<-with(implist_base,matchit(seattle_treat ~ age + race + female + 
                                educ + enrolled + cohabstatus + kids + 
                                manager + jobtenure + subsector),
                                method = meth, distance = dist)
# get the matches
matched_data<-lapply(seq(1:10), function(i){match.data(matches[[i]],data=implist_base[[i]])})


# now that we have matched pairs at baseline, add back in the period 1 observations, 
# now with the appropriate matched pair
# create a dataset that has each respondent id and it's pair/subclass iD
to_join<-lapply(seq(1:10), function(i)data.frame(subclass=matched_data[[i]]$subclass,respondent_id=matched_data[[i]]$respondent_id))

# do the actual merging 
left_joined<-lapply(seq(1:10), function(i)merge(y=implist[[i]],x=to_join[[i]],by="respondent_id",all.x=TRUE))

# remove any period 2 observations that are remaining post merge
left_joined<-lapply(seq(1:10), function(i)left_joined[[i]]%>%filter(period==0 | period==1))

# turn that list into the mitml class of imputed datasets
implist_period01<-mitml::as.mitml.list(left_joined)
}
```


```{r balance_table, eval=TRUE, echo=FALSE, message = FALSE, warning = FALSE}

################################ COVARIATE BALANCE TABLE #######################

# create implist from matched data using nearest neighbor, mahalanobis distance
implist_period01_NNmah <- matching(nearest, mahalanobis)

# turn the mitml class of imputed datasets into a stacked data frame
# updated to do it with a for loop!
imputed_matched_data_NNmah<- data.frame()

for (i in seq(1:10)){
  imputed_matched_data_NNmah <-rbind(imputed_matched_data_NNmah,implist_period01_NNmah[[i]])
  }
  
# keep only period 0 because we are testing baseline levels
imputed_matched_NNmah_base <- imputed_matched_data_NNmah %>%
                              filter(period==0)

# make a balance table of standardized mean differences across imputed datasets
baltable_output<- bal.tab(seattle_treat ~ age + race + female + 
        educ + enrolled + cohabstatus + kids + 
        manager + jobtenure + subsector, 
        data = imputed_matched_NNmah_base, imp = "_mi_m", disp.ks = TRUE)

# extract the "Balance.Across.Imputations" which is the balance summary across imputations
balance_table <-baltable_output[["Balance.Across.Imputations"]]

# keep the columns of the min, mean, max unadjusted difference
balance_table <- balance_table[,2:7]

# add labels for categorical variables
labels <- c("18-19",
            "20-29",
            "30-39",
            "40-49",
            "50+",
            "White, Non-Hisp",
            "Black, Non-Hisp",
            "Hispanic",
            "Asian, Non-Hisp",
            "Other, or Multiracial, Non-Hisp",
            "Female",
            "HS or less",
            "Some College",
            "BA+",
            "Enrolled",
            "Married",
            "Living with a partner",
            "Not living with \n a spouse or partner",
            "Has Children",
            "Is Manager",
            "Less than 1 year",
            "1 year",
            "2 years",
            "3 years",
            "4 years",
            "5 years",
            "6 or more years",
            "Apparel",
            "Cafe",
            "Casual Dining",
            "Dept/Super Store",
            "Fast Food",
            "Grocery",
            "Hardware/Paint",
            "Health/Beauty",
            "Misc. Retail")

balance_table$labels <- labels

# make gt table
balance_table %>%
  gt(rowname_col = "labels") %>%
  tab_header(
    title = md("**Table 1. Baseline Covariate Balance Across Imputed Datasets After Matching**"),
  ) %>%
  fmt_number(
    columns = c(Min.Diff.Un, Mean.Diff.Un, Max.Diff.Un, Min.KS.Un, Mean.KS.Un, Max.KS.Un),
    decimals = 2
  ) %>%
  cols_label(
    Min.Diff.Un = "Min",
    Mean.Diff.Un = "Mean",
    Max.Diff.Un = "Max",
    Min.KS.Un = "Min",
    Mean.KS.Un = "Mean",
    Max.KS.Un = "Max"
  ) %>%
  tab_row_group(
    label = "Subsector",
    rows = 28:36
  ) %>%
  tab_row_group(
    label = "Job Tenure",
    rows = 21:27
  ) %>%
  tab_row_group(
    label = "Cohabitation",
    rows = 16:18
  ) %>%
  tab_row_group(
    label = "Education",
    rows = 12:14
  ) %>%
  tab_row_group(
    label = "Race/Ethnicity",
    rows = 6:10
  ) %>%
  tab_row_group(
    label = "Age",
    rows = 1:5
  ) %>%
  tab_spanner(
    label = "Std. Mean Diff.",
    columns = c(Min.Diff.Un, Mean.Diff.Un, Max.Diff.Un),
    gather= FALSE
  ) %>%
  tab_spanner(
    label = "K–S Statistic",
    columns = c(Min.KS.Un, Mean.KS.Un, Max.KS.Un),
    gather= FALSE)%>%
  tab_source_note(
   "Minimum, mean, and maximum standardized mean differences and Kolmogorov–Smirnov (K–S) statistics in baseline characteristics between treatment and comparison groups. Standardized mean differences and K-S statistics that are close to 0 indicate balance across treatment and comparison" )

```


\newpage




```{r DiD, eval=TRUE, echo=FALSE, message = FALSE, warning = FALSE}

#################################### DiD FIGURES ###############################

# set up to replicate author's original DiD estimates

# subset data to seattle_treat==1 (treament) OR minwage_comp == 1 (control)
seattle<-seattle%>%
  filter(seattle_treat==1|minwage_comp==1)

# subset data to period=0 or period=1 only
seattle_author<-seattle%>%
  filter(period==0|period==1)

# create list of imputed datasets (list of 10 dataframes, each with n = 14,629)
implist_author <- lapply(seq(1:10), function(i) {
  seattle_author %>%
    filter(`_mi_m` == i) %>%
    as.data.frame()
})

implist_author <- mitml::as.mitml.list(implist_author)

#function to store DiD estimates and 95%CI for author's regression
run_regs_authors <- function(outvar, implist) {
  # fit linear regression
  formula_author <- paste(outvar, "~ period*seattle_treat+age+race+female+educ+enrolled+cohabstatus+kids+manager+jobtenure+subsector")
  fit <- with(implist, lm(as.formula(formula_author),weights=acs_weight))

  #store DiD estimates only 
  unpred_reg_author_est_full <- testEstimates(fit)
  unpred_reg_author_est <-as.data.frame(testEstimates(fit)$estimates[34,1])
  
  #store CI and extract for treat*post only 
  unpred_reg_author_ci <- as.data.frame(confint(unpred_reg_author_est_full))
  unpred_reg_author_ci <- as.data.frame(unpred_reg_author_ci[34,])
  
  table <-cbind(unpred_reg_author_est,unpred_reg_author_ci)
  
}


#function to store DiD estimates and 95%CI for our regression
run_regs_ours <- function(outvar, implist) {
  # fit linear regression
  formula_ours <- paste(outvar, "~ period*seattle_treat+age+race+female+educ+enrolled+cohabstatus+kids+manager+jobtenure+subsector")
  fit <- with(implist, lm(as.formula(formula_ours)))
  
  #store DiD estimates only 
  unpred_reg_ours_est_full <- testEstimates(fit)
  unpred_reg_ours_est <-as.data.frame(testEstimates(fit)$estimates[34,1])
  
  #store CI and extract for treat*post only 
  unpred_reg_ours_ci <- as.data.frame(confint(unpred_reg_ours_est_full))
  unpred_reg_ours_ci <- as.data.frame(unpred_reg_ours_ci[34,])
  
  table <-cbind(unpred_reg_ours_est,unpred_reg_ours_ci)
  
}
  
  outcome_names <- c("shortnotice", "timing", "timingpay", "oncall", "clopen", 
                  "cancelpay", "unpred_scale")
  outcome_labels <- c("less than 2 weeks' notice", "shift change", "shift change without pay",
  "clopening shift", "on-call shift", "cancellation without pay", "unpredictability scale")
  
```

```{r figure1, eval=TRUE, echo=FALSE, message = FALSE, warning = FALSE}

################################################################################
##Figure 1
  
  #apply to author's dataset
  vals_authors<-lapply(outcome_names,run_regs_authors, implist=implist_author)
  #apply to our matched dataset
  vals_ours<-lapply(outcome_names,run_regs_ours, implist=implist_period01_NNmah)
  
  #combine results
  total_vals <- rbind(vals_authors,vals_ours)
  table <-bind_rows(total_vals)
  table <- rename(table, lower = "2.5 %",
                  upper = "97.5 %",
                  estimate = "testEstimates(fit)$estimates[34, 1]")
  table$outcome <- rep(outcome_labels,2)
  table$group <- rep(c("author's estimate", "our estimate"), each=7)

```
\newpage

```{r figure1a, eval=TRUE, echo=FALSE, message = FALSE, warning = FALSE,fig.width=11,fig.height=7}
 #plot DiD estimates
figure1a<-table%>%
    filter(outcome=="unpredictability scale")%>%
ggplot(aes(x = group, y = estimate, colour = group)) +
  geom_point() +
  geom_errorbar(aes(ymax = upper, ymin = lower), size = 1) +
  geom_hline(yintercept = 0, linetype = "dotted", col = "red") +
  theme_minimal() +
  theme(legend.position = "right") +
  xlab("") +
  ylab("Difference-in-differences estimate") +
  facet_wrap(~outcome, ncol = 7,labeller =  label_wrap_gen(10)) +
  ggtitle("Figure 1: Comparision of difference-in-difference estimates for scheduling outcomes") +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"),legend.title = element_blank()) +
  scale_color_brewer(palette = "Paired") +
  theme(axis.text.x = element_blank(), panel.border = element_rect(
    fill = NA, color = "gray", size = 0.5,
    linetype = "solid"
  ))

################################################################################
##Figure 1B

 #plot DiD estimates
figure1b<-table%>%
    filter(outcome!="unpredictability scale")%>%
ggplot(aes(x = group, y = estimate, colour = group)) +
  geom_point() +
  geom_errorbar(aes(ymax = upper, ymin = lower), size = 1) +
  geom_hline(yintercept = 0, linetype = "dotted", col = "red") +
  theme_minimal() +
  theme(legend.position = "right", legend.title = element_blank()) +
  xlab("") +
  ylab("Difference-in-differences estimate") +
  facet_wrap(~outcome, ncol = 7,labeller =  label_wrap_gen(10)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold")) +
  scale_color_brewer(palette = "Paired") +
  theme(axis.text.x = element_blank(), panel.border = element_rect(
    fill = NA, color = "gray", size = 0.5,
    linetype = "solid"
  ))

grid.arrange(figure1a, figure1b, nrow = 2,layout_matrix = rbind(c(NA, 1, NA),
                        c(2))
)

```
Figure 1: Difference-in-difference estimates comparing the author's simplified regression model and our model are presented at one year. Estimates are regression-adjusted to control for demographic and work characteristics. 95% confidence intervals are indicated by vertical bars. The red dotted horizontal lines indicate no effect on the outcome as a result of the Seattle Secure Scheduling ordinance.

\newpage

```{r figure2, eval=TRUE, echo=FALSE, message = FALSE, warning = FALSE,fig.width=9,fig.height=5}

################################################################################
## Figure 2

outcome_names <- c("happy","distressed","goodsleep","hardship")
outcome_labels <- c("happy", "distressed", "good sleep", "hardship")

#apply to author's dataset
vals_authors<-lapply(outcome_names,run_regs_authors, implist=implist_author)
#apply to our matched dataset
vals_ours<-lapply(outcome_names,run_regs_ours, implist=implist_period01_NNmah)

#combine results
total_vals <- rbind(vals_authors,vals_ours)
table <-bind_rows(total_vals)
table <- rename(table, lower = "2.5 %",
                upper = "97.5 %",
                estimate = "testEstimates(fit)$estimates[34, 1]")
table$outcome <- rep(outcome_labels,2)
table$group <- rep(c("author's estimate", "our estimate"), each=4)

#plot DiD estimates
ggplot(table, aes(x = group, y = estimate, colour=group)) + 
  geom_point() +
  geom_errorbar(aes(ymax = upper, ymin = lower),size=1) +
  geom_hline(yintercept = 0, linetype = 'dotted', col = 'red') +
  theme_minimal() +
  theme(legend.position = "right",legend.title = element_blank()) + 
  xlab("") + ylab("Difference-in-differences estimate") + 
  facet_wrap(~outcome, ncol=4) +
  ggtitle("Figure 2: Comparision of difference-in-difference estimates for worker well-being outcomes") +
  theme(axis.text.x = element_blank(),plot.title = element_text(hjust = 0.5, face = "bold"),panel.border = element_rect(
    fill = NA, color = "gray", size = 0.5,
    linetype = "solid"
  )) +
  scale_color_brewer(palette = 'Paired') 

```
Figure 2: Difference-in-difference estimates comparing the author's simplified regression model and our model are presented at one year. Estimates are regression-adjusted to control for demographic and work characteristics. 95% confidence intervals are indicated by vertical bars. The red dotted horizontal lines indicate no effect on the outcome as a result of the Seattle Secure Scheduling ordinance.
\newpage

# Replication Tables and Figures

```{r, include=FALSE}
# setwd to where your data is
#setwd(')

#### SET UP ####

# read in data
seattle <- read_dta("Seattle_SSO.dta")

# turn numeric variables into factors
seattle <- seattle %>%
  mutate(across(c(2, 7:17), factor))

# create instrumental variables
seattle <- seattle %>%
  mutate(seapost2 = as.numeric(period == 2 & seattle_treat == 1), 
         seapost1 = as.numeric(period == 1 & seattle_treat == 1))

# subset data to seattle_treat==1 (treament) OR minwage_comp == 1 (control)
seattle<-seattle%>%
  filter(seattle_treat==1|minwage_comp==1)

# create list of imputed datasets (list of 10 dataframes, each with n = 17689)
implist <- lapply(seq(1:10), function(i) {
  seattle %>%
    filter(`_mi_m` == i) %>%
    as.data.frame()
})

implist <- mitml::as.mitml.list(implist)
```


```{r tableA1, eval=TRUE, echo=FALSE, message = FALSE, warning = FALSE}

# vector of 11 baseline characteristics for dependent vars
dependent_vars <- c("unpred_scale", "shortnotice", "timing", "timingpay", 
                    "oncall", "clopen", "cancelpay", "happy", "distressed", 
                    "goodsleep", "hardship")

# create empty data frame to store estimates in the loop
estimates <- data.frame()

# subset data to baseline (period==0)
seattle_t1<-seattle%>%
  filter(period==0)

# create list of imputed datasets (list of 10 dataframes, each with n = 17689)
implist_t1 <- lapply(seq(1:10), function(i) {
  seattle_t1 %>%
    filter(`_mi_m` == i) %>%
    as.data.frame()
})

implist_t1 <- mitml::as.mitml.list(implist_t1)

for (y in dependent_vars) {
  
  # create formula because R cannot loop over string variables...
  formula <- paste0(y, "~ seattle_treat + age + race + female + 
                     educ + enrolled + cohabstatus + kids + 
                     manager + jobtenure + subsector")
  
  # fit OLS over imputed datasets, at baseline, with acs survey weights                 
  fit <- with(implist, {lm(as.formula(formula), 
                           weights = acs_weight)}) 
  
  # ggpredict returns conditional effects (conditional on factor reference level)
  predictions <- lapply(seq(1:10), function(i) ggpredict(fit[[i]], "seattle_treat"))
  
  # pool predictions over 10 imputed data set estimates
  pooled_predictions <- pool_predictions(predictions)
  
  # keep treatment ind & estimate only,transpose predictions
  pooled_predictions <- pooled_predictions[,1:2]
  pooled_predictions <- as.data.frame(t(pooled_predictions))
  
  # label the comparison and treament columns, remove the 0/1 indicator
  colnames(pooled_predictions)[1] <- "Comparison"
  colnames(pooled_predictions)[2] <- "Treatment"
  pooled_predictions <- pooled_predictions[-1,]
  
  
  # store estimates in data frame
  estimates <- rbind(estimates, pooled_predictions)
}

# convert variables to numeric
estimates$Comparison <- as.numeric(estimates$Comparison)
estimates$Treatment <- as.numeric(estimates$Treatment)

# multiple shares by 100 to get % 
for (i in c(2,3,4,5,6,7,8,9,10,11)){
  estimates[i,] <- (estimates[i,]*100) 
  estimates[i,] <- round(estimates[i,], digits = 1) 
}

# put treatment column on the left, as in Table 1
estimates <- rev(estimates)

# add dep var labels
labels <- c("Unpred. scale (0 to 6)", "Less than 2 wks' notice", "Last-minute change", "Change without pay", "Clopening", "On-call", "Cancel without pay", "Happiness", "Psychological distress", "Good sleep", "Any material hardship")
estimates <- cbind(estimates, labels)



# make table
estimates %>%
  gt(rowname_col = "labels") %>%
  tab_header(
    title = md("**Table A1. Baseline work schedule and well-being outcomes for workers in Seattle and comparison cities**"),
  ) %>%
  tab_row_group(
    label = "Well-being indicators",
    rows = 8:11
  ) %>%
  tab_row_group(
    label = "Work schedules",
    rows = 1:7
  ) %>%
  fmt_number(
    columns = c(Treatment, Comparison),
    decimals = 1
  ) %>%
  cols_label(
    Treatment = "Seattle (%) or mean",
    Comparison = "Comparison cities (%) or mean"
  ) %>%
  tab_source_note(
    "Mean values and percentages are regression-adjusted to control for demographics (age, race/ethnicity, sex, educational attainment, school enrollment, marital status, and parental status) and work characteristics (managerial status, job tenure, and industry subsector). Numbers reflect the dependent variable predicted value, holding convariates at their respective reference group."
  )

```
\newpage



```{r figureA1, eval=TRUE, echo=FALSE, message = FALSE, warning = FALSE}
## FUNCTION for GGPredict ##
std_margins <- function(outvar) {
  # create formula for linear model
  formula <- paste(outvar, "~ period * seattle_treat + age+race+
                   female+educ+enrolled+
                   cohabstatus+kids+manager+jobtenure+subsector")
  
  # run weighted ols
  fit <- with(implist, lm(as.formula(formula), weights = acs_weight))
  # mod <- lm(formula, data = seattle, weights = seattle$acs_weight)
  
  # predict, can't cluster std errors
  pred_df <- lapply(seq(1:10), 
                    function(i) ggpredict(fit[[i]], 
                                          terms = c("period", "seattle_treat")))
  
  # standardize so that the baseline value is zero
  implist <- as.mitml.list(lapply(
    seq(1:10),
    function(i) {
      implist[[i]] <- implist[[i]] %>%
        mutate("{outvar}_new" := 
                 case_when(as.formula(paste0("seattle_treat == 0 ~", outvar,
                                             "- pred_df[[i]]$predicted[1]")), 
                           as.formula(paste0("seattle_treat == 1 ~", outvar, 
                                             "- pred_df[[i]]$predicted[2]"))))
    }
  ))
  # create formula for new regression
  formula_new <- paste0(outvar, "_new ~ period * seattle_treat + age+race+
                        female+educ+enrolled+cohabstatus+
                        kids+manager+jobtenure+subsector")
  
  # run new weighted ols with standardized variables
  fit_new <- with(implist, lm(as.formula(formula_new), weights = acs_weight))
  # mod_new <- lm(formula_new, data = seattle_std, weights = seattle_std$acs_weight)
  
  # predict on new data set
  pred_df_new <- lapply(seq(1:10), 
                        function(i) ggpredict(fit_new[[i]], 
                                              terms = c("period", "seattle_treat")))
  
  # pool predictions
  pooled <- pool_predictions(pred_df_new)
  
  # save altered dataset to the global environment
  assign("implist", implist, envir = .GlobalEnv)
  
  # return
  return(pooled)
}


#this is going to take a few minutes to run
vals<-lapply(c("shortnotice", "timing", "timingpay", "oncall", "clopen", 
               "cancelpay", "unpred_scale"),std_margins)

## plot each one 

plot_margins_a <- function(df, var) {
  df <- as.data.frame(df)
  ggplot() +
    scale_y_continuous(limits = c(-.8, .2), breaks = c(-.8, -.6, -.4, -.2, 0, .1,  .2)) +
    geom_ribbon(data = df %>% filter(group == 0), aes(x = as.integer(x), 
                                                      y = predicted, 
                                                      ymin = conf.low, 
                                                      ymax = conf.high, 
                                                      alpha = .01, 
                                                      fill = group)) +
    geom_ribbon(data = df %>% filter(group == 1), aes(x = as.integer(x), 
                                                      y = predicted, 
                                                      ymin = conf.low, 
                                                      ymax = conf.high, 
                                                      alpha = .01, 
                                                      fill = group)) +
    geom_line(data = df %>% filter(group == 0), aes(x = as.integer(x), 
                                                    y = predicted),
              color = "gray", size = 0.5) +
    geom_line(data = df %>% filter(group == 1), aes(x = as.integer(x), 
                                                    y = predicted), 
              color = "darkgreen", size = 0.5) +
    geom_point(data = df %>% filter(group == 1), aes(x = as.integer(x), 
                                                     y = predicted), shape = 0) +
    geom_point(data = df %>% filter(group == 0), aes(x = as.integer(x), y = predicted)) +
    scale_x_discrete(name = "", limits = c(1, 2, 3), labels = c("Blin", "Y1", "Y2")) +
    geom_vline(xintercept = c(1.5), linetype = "dotted") +
    scale_fill_manual(values = c("gray", "honeydew2"), name = "", 
                      labels = c("comparison workers", "Seattle workers")) +
    theme_minimal() +
    theme(legend.position = "none") +
    ggtitle(var) +
    theme(plot.title = element_text(hjust = 0.5)) +
    guides(alpha = "none") +
    labs(y = "")
}

a1<-plot_margins_a(vals[7], "unpredictability scale")


plot_margins <- function(df, var) {
  df <- as.data.frame(df)
  ggplot() +
    scale_y_continuous(limits = c(-.3, .18), 
                       breaks = c(-.3, -.25, -.2, -.15, -.10, -.05, 0, .05, .1, .15)) +
    geom_ribbon(data = df %>% filter(group == 0), aes(x = as.integer(x), 
                                                      y = predicted,
                                                      ymin = conf.low, 
                                                      ymax = conf.high, 
                                                      alpha = .01, 
                                                      fill = group)) +
    geom_ribbon(data = df %>% filter(group == 1), aes(x = as.integer(x), 
                                                      y = predicted, 
                                                      ymin = conf.low, 
                                                      ymax = conf.high, 
                                                      alpha = .01, 
                                                      fill = group)) +
    geom_line(data = df %>% filter(group == 0), aes(x = as.integer(x), 
                                                    y = predicted), 
              color = "gray", size = 0.5) +
    geom_line(data = df %>% filter(group == 1), aes(x = as.integer(x), 
                                                    y = predicted), 
              color = "darkgreen", size = 0.5) +
    geom_point(data = df %>% filter(group == 1), aes(x = as.integer(x),
                                                     y = predicted), shape = 0) +
    geom_point(data = df %>% filter(group == 0), aes(x = as.integer(x),
                                                     y = predicted)) +
    scale_x_discrete(name = "", limits = c(1, 2, 3), labels = c("Blin", "Y1", "Y2")) +
    geom_vline(xintercept = c(1.5), linetype = "dotted") +
    scale_fill_manual(values = c("gray", "honeydew2"), name = "", 
                      labels = c("comparison workers", "Seattle workers")) +
    theme_minimal() +
    theme(legend.position = "none") +
    ggtitle(var) +
    theme(plot.title = element_text(hjust = 0.5)) +
    guides(alpha = "none") +
    labs(y = "")
}


a<-plot_margins(vals[1], "less than 2 weeks' notice")
b<-plot_margins(vals[2], "shift change")
c<-plot_margins(vals[3], "shift change without pay")
d<-plot_margins(vals[4], "clopening shift")
e<-plot_margins(vals[5], "on-call shift")
f<-plot_margins(vals[6], "cancellation without pay")
```
\newpage

```{r figureA1plot, eval=TRUE, echo=FALSE, message = FALSE, warning = FALSE, fig.width=10, fig.height=7}
figure1a <- ggarrange(a1,
                      ncol = 1, nrow = 1,common.legend = TRUE, legend = "bottom")

figure1b <- ggarrange(a, b, c, d, e, f,
                      ncol = 3, nrow = 2,common.legend = TRUE, legend = "bottom")

figure1 <- ggarrange(figure1a, figure1b, ncol=1, nrow=2, widths = c(0.5,1),   
                     heights = c(1,1), labels = c('A', 'B'))

annotate_figure(figure1, top = text_grob("Figure A1: Impacts of Seattle’s Secure Scheduling ordinance on work schedule unpredictability scale (0 to 6)", face = "bold", size = 14))

```
\newpage


```{r figureA2, eval=TRUE, echo=FALSE, message = FALSE, warning = FALSE}

#run code for figure 1 first
vals<-lapply(c("happy","distressed","goodsleep","hardship"),std_margins)


## plot each one

plot_margins <- function(df, var) {
  df <- as.data.frame(df)
  ggplot() +
    scale_y_continuous(limits = c(-.15, .18), 
                       breaks = c(-.15, -.10, -.05, 0, .05, .1, .15)) +
    geom_ribbon(data = df %>% filter(group == 0), aes(x = as.integer(x), 
                                                      y = predicted, 
                                                      ymin = conf.low, 
                                                      ymax = conf.high, 
                                                      alpha = .01, 
                                                      fill = group)) +
    geom_ribbon(data = df %>% filter(group == 1), aes(x = as.integer(x), 
                                                      y = predicted, 
                                                      ymin = conf.low, 
                                                      ymax = conf.high, 
                                                      alpha = .01, 
                                                      fill = group)) +
    geom_line(data = df %>% filter(group == 1), aes(x = as.integer(x), 
                                                    y = predicted), 
              color = "darkgreen", size = 0.5) +
    geom_line(data = df %>% filter(group == 0), aes(x = as.integer(x), 
                                                    y = predicted), 
              color = "gray", size = 0.5) +
    geom_point(data = df %>% filter(group == 1), aes(x = as.integer(x), 
                                                     y = predicted), shape = 0) +
    geom_point(data = df %>% filter(group == 0), aes(x = as.integer(x),
                                                     y = predicted)) +
    scale_x_discrete(name = "", limits = c(1, 2, 3), 
                     labels = c("Blin", "Y1", "Y2")) +
    geom_vline(xintercept = c(1.5), linetype = "dotted") +
    scale_fill_manual(values = c("gray", "honeydew2"), name = "", 
                      labels = c("comparison workers", "Seattle workers")) +
    theme_minimal() +
    theme(legend.position = "none") +
    ggtitle(var) +
    theme(plot.title = element_text(hjust = 0.5)) +
    guides(alpha = "none") +
    labs(y = "")
}


a<-plot_margins(vals[1], "happy")
b<-plot_margins(vals[2], "distressed")
c<-plot_margins(vals[3], "good or very good sleep")
d<-plot_margins(vals[4], "any hardship")
```


```{r figureA2plot, results="asis", eval=TRUE, echo=FALSE,fig.width=10, fig.height=7}
figure2 <- ggarrange(a, b, c,d,
                     ncol = 2, nrow = 2,common.legend = TRUE, legend = "bottom")

annotate_figure(figure2, top = text_grob("Figure A2: Impacts of Seattle’s Secure Scheduling ordinance on worker well-being", face = "bold", size = 14))
```
\newpage



```{r tableA2, eval=TRUE, echo=FALSE, message = FALSE, warning = FALSE}
#Reload the data, otherwise this doesn't work for some reason

#set working directory
#setwd('')

# read in data
seattle <- read_dta("Seattle_SSO.dta")

# turn numeric variables into factors
seattle <- seattle %>%
  mutate(across(c(2, 7:9, 11, 13, 16, 17), factor))

# create instrumental variables
seattle <- seattle %>%
  mutate(seapost2 = as.numeric(period == 2 & seattle_treat == 1), 
         seapost1 = as.numeric(period == 1 & seattle_treat == 1))

seattle<-seattle%>%
  filter(seattle_treat==1|minwage_comp==1)

# create list of imputed datasets (list of 10 dataframes, each with n = 17689)
implist <- lapply(seq(1:10), function(i) {
  seattle %>%
    filter(`_mi_m` == i) %>%
    as.data.frame()
})

implist <- mitml::as.mitml.list(implist)

#### F Stat ####

# this is done per author's specification, which is non standard
# use "With" and the list of imputed datasets, plus model
# note, I don't cluster SEs because there's now only one entry per respondent id 
#(It throws an error if I cluster)
fit <- with(implist, felm(unpred_scale ~ age + race + female + educ + enrolled +
                            cohabstatus + kids + manager + jobtenure + subsector + 
                            period * seattle_treat, weights = acs_weight))

# puts imputed models together to generate an output
tt<-testEstimates(fit)

fstat <- tt$estimates[, 3]["period2:seattle_treat1"]^2

#### IV Regressions ####

## IV with a linear probability model

linearprob<-with(implist,ivreg(happy~age+race+female+educ+enrolled+
                                 cohabstatus+kids+manager+jobtenure+subsector+period+
                                 seattle_treat|unpred_scale|seapost1+seapost2, 
                               weights = acs_weight))

linearprob2<-with(implist,ivreg(distressed~age+race+female+educ+enrolled+
                                  cohabstatus+kids+manager+jobtenure+subsector+period+
                                  seattle_treat|unpred_scale|seapost1+seapost2, 
                                weights = acs_weight))

linearprob3<-with(implist,ivreg(goodsleep~age+race+female+educ+enrolled+
                                  cohabstatus+kids+manager+jobtenure+subsector+period+
                                  seattle_treat|unpred_scale|seapost1+seapost2, 
                                weights = acs_weight))

linearprob4<-with(implist,ivreg(hardship~age+race+female+educ+enrolled+
                                  cohabstatus+kids+manager+jobtenure+subsector+period+
                                  seattle_treat|unpred_scale|seapost1+seapost2, 
                                weights = acs_weight))
```


```{r texregfunction, eval=TRUE, echo=FALSE, message = FALSE, warning = FALSE}
# create extraction function for mitml class functions
# so that we can use texreg to make a table
extract.mitml <- function(model) {
  s <- testEstimates(model)
  names <- rownames(s$estimates)
  co <- s$estimates[, 1]
  se <- s$estimates[, 2]
  pval <- s$estimates[, 5]
  # n <- length(model[[1]]$residuals)
  f <- fstat
  gof <- c(f)
  gof.names <- c("F. Statistic")
  tr <- createTexreg(
    coef.names = names,
    coef = co,
    se = se,
    pvalues = pval,
    gof.names = gof.names,
    gof = gof
  )
  return(tr)
}
setMethod("extract",
          signature = className("mitml.result", "mitml"),
          definition = extract.mitml
)
```




```{r tableA2print, results="asis", eval=TRUE, echo=FALSE,}
## Create Table 2 ##
texreg(list(linearprob, linearprob2, linearprob3, linearprob4),
       custom.coef.map = list("unpred_scale" = "Unpred. coef"),
       stars = c(0.001, 0.01, 0.05, 0.1),
       custom.model.names = c("Happiness", "Psychological distress", "Good sleep", 
                              "Any material hardship"),
       caption = "Table A2: Two-stage least squares estimates of causal effects of schedule unpredictability scale on well-being outcomes (n = 17,689)",
       caption.above = TRUE
)
```



\newpage
```{r tableA3, eval=TRUE, echo=FALSE, message = FALSE, warning = FALSE}
# predict on unpred_scale
table3_happy <- lapply(seq(1:10), function(i) ggpredict(linearprob[[i]],
                                                        terms = "unpred_scale"))
table3_distressed <- lapply(seq(1:10), function(i) ggpredict(linearprob2[[i]], 
                                                             terms = "unpred_scale"))
table3_goodsleep <- lapply(seq(1:10), function(i) ggpredict(linearprob3[[i]],
                                                            terms = "unpred_scale"))
table3_hardship <- lapply(seq(1:10), function(i) ggpredict(linearprob4[[i]], 
                                                           terms = "unpred_scale"))
# pool predictions
pooled1 <- pool_predictions(table3_happy)
pooled2 <- pool_predictions(table3_distressed)
pooled3 <- pool_predictions(table3_goodsleep)
pooled4 <- pool_predictions(table3_hardship)

#update to add appropriate degrees of freedom from the multiple imputation
pooled1<-cbind(pooled1,df=testEstimates(linearprob)$estimates[,4][2])
pooled2<-cbind(pooled2,df=testEstimates(linearprob2)$estimates[,4][2])
pooled3<-cbind(pooled3,df=testEstimates(linearprob3)$estimates[,4][2])
pooled4<-cbind(pooled4,df=testEstimates(linearprob4)$estimates[,4][2])

#Function to print pooled coefficients
extract.pooled <- function(pool) {
  names <- pool$x
  co <- pool$predicted
  se <- pool$`std.error`
  df<-pool$df
  tval<-pool$predicted/pool$`std.error`
  pval <- pt(tval,df,lower.tail=FALSE)*2
  tr <- createTexreg(
    coef.names = as.character(names),
    coef = co,
    se = se,
    pvalues = pval,
  )
  return(tr)
}


```
\newpage


```{r tableA3print, results="asis", eval=TRUE, echo=FALSE}

texreg(list(extract.pooled(pooled1),extract.pooled(pooled2),
            extract.pooled(pooled3),extract.pooled(pooled4)), 
       custom.coef.map=list("3"="Actual 3 of 6 types \ Unpredictability",
                            "0"="Simulated 0 of 6 types \ Unpredictability"),
       stars = c(0.001, 0.01, 0.05, 0.1),
       custom.model.names = c("Happiness", "Psychological distress", "Good sleep", 
                              "Any material hardship"),
       caption = "Table A3: Predicted values for well-being outcomes for workers with 
       average unpredictability or simulated elimination of unpredictability 
       (n = 17,689)", caption.above = TRUE)
```

